home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Pascal Super Library
/
Pascal Super Library (CW International)(1997).bin
/
COMPILER
/
LYSRC
/
YACC.Y
< prev
next >
Wrap
Text File
|
1993-01-23
|
22KB
|
812 lines
/* YACC.Y: Yacc grammar for Yacc main program. 2-17-91, 4-30-91 AG
To bootstrap Yacc, use Yacc iself to compile this grammar, then
run tpc on the generated program.
Note:
This is not entirely the `official' syntax introduced by Johnson, but it
should be compatible with UNIX Yacc (except for the differences specified
in the program header, below), as described in the UNIX manual, including
the language elements entitled as "old features supported but not
encouraged."
Bugs:
- Processes $$'s, $i's, %} and } inside of comments in Turbo Pascal code
(instead of ignoring them).
Shift/reduce conflicts:
This grammar will produce a number of shift/reduce conflicts caused by
the error productions, since it does not specify unambigiously whether
errors are to be handled in global structures (definitions and rules)
or by enclosed syntactic constructs (e.g. symbols). Yacc will resolve
these conflicts in favour of shift, which is o.k. (it means that
errors will be caught in the innermost constructs with error handling,
thus reducing the amount of skipped symbols in resynchronization).
Error handling is done using the general method of Schreiner/Friedman
(see Schreiner/Friedman, "Introduction to compiler construction with
UNIX," 1985).
*/
%{
{$X+}
program Yacc;
(* Copyright (c) 1990,91 by Albert Graef, Schillerstr. 18,
6509 Schornsheim/Germany
All rights reserved *)
uses YaccLib, YaccBase, YaccMsgs, YaccSem, YaccTables, YaccParseTable;
(* TP Yacc - Yet Another Compiler Compiler for MS-DOS/Turbo Pascal
Version 3.0 as of April 91
Version 3.0a as of May 92 (bug fixes in precedence and type information
updates)
Author
Albert Graef
Schillerstr. 18
6509 Schornsheim/Germany
Graef@DMZRZU71.bitnet
Synopsis YACC [options] yacc-file[.Y] [output-file[.PAS]]
Options
/v "Verbose:" Yacc generates a readable description of the generated
parser, written to yacc-file with new extension .LST.
/d "Debug:" Yacc generates parser with debugging output.
Description
This is a reimplementation of the popular UNIX compiler generator
Yacc for MS-DOS and Turbo Pascal.
Differences from UNIX Yacc:
- Produces output code for Turbo Pascal, rather than for C.
- Does not support %union definitions. Instead, a value type is declared
by specifying the type identifier *itself* as the tag of a %token
or %type definition. Yacc will automatically generate an appropriate
yylval variable of a variant record type (YYSType) which is capable of
holding values of any of the types used in %token and %type.
Type checking is *very* strict. If you use type definitions, then
any symbol referred to in an action *must* have a type introduced
in a type definition. Either the symbol must have been assigned a
type in the definitions section, or the $<type-identifier> notation
must be used. The syntax of the %type definition has been changed
slightly to allow definitions of the form
%type <type-identifier>
(omitting the nonterminals) which may be used to declare types which
are not assigned to any grammar symbol, but are used with the
$<...> construct.
- The parse tables constructed by this Yacc version are slightly greater
than those constructed by UNIX Yacc, since a reduce action will only be
chosen as the default action if it is the *only* action in the state.
In difference, UNIX Yacc chooses a reduce action as the default action
whenever it is the only *reduce* action of the state (even if there are
other shift actions).
This solves a bug in UNIX Yacc that makes the generated parser start
error recovery too late with certain types of error productions (see
also Schreiner/Friedman, "Introduction to compiler construction with
UNIX," 1985). Also, errors will be caught sooner in most cases where
standard Yacc would carry out an additional (default) reduction before
detecting the error.
*)
%}
/* Lexical part of the Yacc language: */
%token
ID /* identifiers: {letter}{letter_or_digit}* */
C_ID /* identifier which forms left side of rule, i.e. is
followed by a colon */
LITERAL /* single character literal */
LITID /* multiple character literal */
NUMBER /* nonnegative integers: {digit}+ */
PTOKEN PLEFT PRIGHT PNONASSOC PTYPE PSTART PPREC
/* reserved words: PTOKEN=%token, etc. */
PP /* source sections separator %% */
LCURL /* curly braces: %{ and %} */
RCURL
',' ':' ';' '|' '{' '}' '<' '>' '='
/* literals */
ILLEGAL /* illegal input character */
%start grammar
%%
/* Lexical entities, those that may give rise to syntax errors are augmented
with error productions, and important symbols call yyerrok. */
id : ID
c_id : C_ID
literal : LITERAL
litid : LITID
number : NUMBER
ptoken : PTOKEN { yyerrok; }
pleft : PLEFT { yyerrok; }
pright : PRIGHT { yyerrok; }
pnonassoc : PNONASSOC { yyerrok; }
ptype : PTYPE { yyerrok; }
pstart : PSTART { yyerrok; }
pprec : PPREC
pp : PP { yyerrok; }
lcurl : LCURL
rcurl : RCURL
| error { error(rcurl_expected); }
comma : ','
colon : ':' { yyerrok; }
semicolon : ';' { yyerrok; }
bar : '|' { yyerrok; }
lbrace : '{'
rbrace : '}'
| error { error(rbrace_expected); }
langle : '<'
rangle : '>'
| error { error(rangle_expected); }
eq : '='
/* Syntax and semantic routines: */
grammar : defs pp
{ sort_types;
definitions;
next_section; }
rules
{ next_section;
generate_parser;
next_section; }
aux_procs
;
aux_procs : /* empty: aux_procs is optional */
| pp { copy_rest_of_file; }
;
defs : /* empty */
| defs def { yyerrok; }
| defs error { error(error_in_def); }
;
def : pstart id
{ startnt := ntsym($2); }
| pstart error
{ error(ident_expected); }
| lcurl { copy_code; } rcurl
| ptoken
{ act_prec := 0; }
tag token_list
| pleft
{ act_prec := new_prec_level(left); }
tag token_list
| pright
{ act_prec := new_prec_level(right); }
tag token_list
| pnonassoc
{ act_prec := new_prec_level(nonassoc); }
tag token_list
| ptype tag nonterm_list
| ptype tag
;
tag : /* empty: type tag is optional */
{ act_type := 0; }
| langle id rangle
{ act_type := $2; add_type($2); }
;
token_list : token_num
| token_list token_num
{ yyerrok; }
| token_list comma token_num
{ yyerrok; }
| error
{ error(ident_expected); }
| token_list error
{ error(error_in_def); }
| token_list comma error
{ error(ident_expected); }
;
token_num : literal
{ if act_type<>0 then
sym_type^[$1] := act_type;
if act_prec<>0 then
sym_prec^[$1] := act_prec; }
| litid
{ litsym($1, 0);
if act_type<>0 then
sym_type^[litsym($1, 0)] := act_type;
if act_prec<>0 then
sym_prec^[litsym($1, 0)] := act_prec; }
| id
{ litsym($1, 0);
if act_type<>0 then
sym_type^[litsym($1, 0)] := act_type;
if act_prec<>0 then
sym_prec^[litsym($1, 0)] := act_prec; }
| litid number
{ litsym($1, 0);
if act_type<>0 then
sym_type^[litsym($1, $2)] := act_type;
if act_prec<>0 then
sym_prec^[litsym($1, 0)] := act_prec; }
| id number
{ litsym($1, 0);
if act_type<>0 then
sym_type^[litsym($1, $2)] := act_type;
if act_prec<>0 then
sym_prec^[litsym($1, 0)] := act_prec; }
;
nonterm_list : nonterm
| nonterm_list nonterm
{ yyerrok; }
| nonterm_list comma nonterm
{ yyerrok; }
| error
{ error(ident_expected); }
| nonterm_list error
{ error(error_in_def); }
| nonterm_list comma error
{ error(ident_expected); }
;
nonterm : id
{ if act_type<>0 then
sym_type^[ntsym($1)] := act_type; }
;
rules : { next_section; }
rule1
| lcurl { copy_code; } rcurl
{ next_section; }
rule1
/* rules section may be prefixed
with `local' Turbo Pascal
declarations */
| rules rule
{ yyerrok; }
| error
{ error(error_in_rule); }
| rules error
{ error(error_in_rule); }
;
rule1 : c_id
{ start_rule(ntsym($1)); }
colon
{ start_body; }
body prec
{ end_body; }
;
rule : rule1
| bar
{ start_body; }
body prec
{ end_body; }
;
body : /* empty */
| body literal
{ add_symbol($2); yyerrok; }
| body litid
{ add_symbol(sym($2)); yyerrok; }
| body id
{ add_symbol(sym($2)); yyerrok; }
| body action
{ add_action; yyerrok; }
| body error
{ error(error_in_rule); }
;
action : lbrace { copy_action; } rbrace
| eq { copy_single_action; }
/* old language feature; code must be
single statement ending with `;' */
;
prec : /* empty */
| pprec literal
{ add_rule_prec($2); }
opt_action
| pprec litid
{ add_rule_prec(litsym($2, 0)); }
opt_action
| pprec id
{ add_rule_prec(litsym($2, 0)); }
opt_action
| prec semicolon
;
opt_action : /* empty */
| action
{ add_action; }
;
%%
(* Lexical analyzer (implemented in Turbo Pascal for maximum efficiency): *)
function yylex : integer;
function end_of_input : boolean;
begin
end_of_input := (cno>length(line)) and eof(yyin)
end(*end_of_input*);
procedure scan;
(* scan for nonempty character, skip comments *)
procedure scan_comment;
var p : integer;
begin
p := pos('*/', copy(line, cno, length(line)));
if p>0 then
cno := cno+succ(p)
else
begin
while (p=0) and not eof(yyin) do
begin
readln(yyin, line);
inc(lno);
p := pos('*/', line)
end;
if p=0 then
begin
cno := succ(length(line));
error(open_comment_at_eof);
end
else
cno := succ(succ(p))
end
end(*scan_comment*);
begin
while not end_of_input do
if cno<=length(line) then
case line[cno] of
' ', tab : inc(cno);
'/' :
if (cno<length(line)) and (line[succ(cno)]='*') then
begin
inc(cno, 2);
scan_comment
end
else
exit
else
exit
end
else
begin
readln(yyin, line);
inc(lno); cno := 1;
end
end(*scan*);
function scan_ident : integer;
(* scan an identifier *)
var
idstr : String;
begin
idstr := line[cno];
inc(cno);
while (cno<=length(line)) and (
('A'<=upCase(line[cno])) and (upCase(line[cno])<='Z') or
('0'<=line[cno]) and (line[cno]<='9') or
(line[cno]='_') or
(line[cno]='.') ) do
begin
idstr := idstr+line[cno];
inc(cno)
end;
yylval := get_key(idstr);
scan;
if not end_of_input and (line[cno]=':') then
scan_ident := C_ID
else
scan_ident := ID
end(*scan_ident*);
function scan_literal: integer;
(* scan a literal, i.e. string *)
var
idstr : String;
oct_val : Byte;
begin
idstr := line[cno];
inc(cno);
while (cno<=length(line)) and (line[cno]<>idstr[1]) do
if line[cno]='\' then
if cno<length(line) then
begin
inc(cno);
case line[cno] of
'n' :
begin
idstr := idstr+nl;
inc(cno)
end;
'r' :
begin
idstr := idstr+cr;
inc(cno)
end;
't' :
begin
idstr := idstr+tab;
inc(cno)
end;
'b' :
begin
idstr := idstr+bs;
inc(cno)
end;
'f' :
begin
idstr := idstr+ff;
inc(cno)
end;
'0'..'7' :
begin
oct_val := ord(line[cno])-ord('0');
inc(cno);
while (cno<=length(line)) and
('0'<=line[cno]) and
(line[cno]<='7') do
begin
oct_val := oct_val*8+ord(line[cno])-ord('0');
inc(cno)
end;
idstr := idstr+chr(oct_val)
end
else
begin
idstr := idstr+line[cno];
inc(cno)
end
end
end
else
inc(cno)
else
begin
idstr := idstr+line[cno];
inc(cno)
end;
if cno>length(line) then
error(missing_string_terminator)
else
inc(cno);
if length(idstr)=2 then
begin
yylval := ord(idstr[2]);
scan_literal := LITERAL;
end
else if length(idstr)>1 then
begin
yylval := get_key(''''+copy(idstr, 2, pred(length(idstr)))+'''');
scan_literal := LITID;
end
else
scan_literal := ILLEGAL;
end(*scan_literal*);
function scan_num : integer;
(* scan an unsigned integer *)
var
numstr : String;
code : integer;
begin
numstr := line[cno];
inc(cno);
while (cno<=length(line)) and
('0'<=line[cno]) and (line[cno]<='9') do
begin
numstr := numstr+line[cno];
inc(cno)
end;
val(numstr, yylval, code);
if code=0 then
scan_num := NUMBER
else
scan_num := ILLEGAL;
end(*scan_num*);
function scan_keyword : integer;
(* scan %xy *)
function lookup(key : String; var tok : integer) : boolean;
(* table of Yacc keywords (unstropped): *)
const
no_of_entries = 11;
max_entry_length = 8;
keys : array [1..no_of_entries] of String[max_entry_length] = (
'0', '2', 'binary', 'left', 'nonassoc', 'prec', 'right',
'start', 'term', 'token', 'type');
toks : array [1..no_of_entries] of integer = (
PTOKEN, PNONASSOC, PNONASSOC, PLEFT, PNONASSOC, PPREC, PRIGHT,
PSTART, PTOKEN, PTOKEN, PTYPE);
var m, n, k : integer;
begin
(* binary search: *)
m := 1; n := no_of_entries;
lookup := true;
while m<=n do
begin
k := m+(n-m) div 2;
if key=keys[k] then
begin
tok := toks[k];
exit
end
else if key>keys[k] then
m := k+1
else
n := k-1
end;
lookup := false
end(*lookup*);
var
keywstr : String;
tok : integer;
begin
inc(cno);
if cno<=length(line) then
case line[cno] of
'<' :
begin
scan_keyword := PLEFT;
inc(cno)
end;
'>' :
begin
scan_keyword := PRIGHT;
inc(cno)
end;
'=' :
begin
scan_keyword := PPREC;
inc(cno)
end;
'%', '\' :
begin
scan_keyword := PP;
inc(cno)
end;
'{' :
begin
scan_keyword := LCURL;
inc(cno)
end;
'}' :
begin
scan_keyword := RCURL;
inc(cno)
end;
'A'..'Z', 'a'..'z', '0'..'9' :
begin
keywstr := line[cno];
inc(cno);
while (cno<=length(line)) and (
('A'<=upCase(line[cno])) and (upCase(line[cno])<='Z') or
('0'<=line[cno]) and (line[cno]<='Z') ) do
begin
keywstr := keywstr+line[cno];
inc(cno)
end;
if lookup(keywstr, tok) then
scan_keyword := tok
else
scan_keyword := ILLEGAL
end;
else scan_keyword := ILLEGAL
end
else
scan_keyword := ILLEGAL;
end(*scan_keyword*);
function scan_char : integer;
(* scan any single character *)
begin
scan_char := ord(line[cno]);
inc(cno)
end(*scan_char*);
var lno0, cno0 : integer;
begin
tokleng := 0;
scan;
lno0 := lno; cno0 := cno;
if end_of_input then
yylex := 0
else
case line[cno] of
'A'..'Z', 'a'..'z', '_' : yylex := scan_ident;
'''', '"' : yylex := scan_literal;
'0'..'9' : yylex := scan_num;
'%', '\' : yylex := scan_keyword;
'=' :
if (cno<length(line)) and (line[succ(cno)]='{') then
begin
inc(cno);
yylex := scan_char
end
else
yylex := scan_char;
else yylex := scan_char;
end;
if lno=lno0 then
tokleng := cno-cno0
end(*yylex*);
(* Main program: *)
var i : Integer;
begin
(* sign-on: *)
writeln(sign_on);
(* parse command line: *)
if paramCount=0 then
begin
writeln(usage);
writeln(options);
halt(0);
end;
yfilename := '';
pasfilename := '';
for i := 1 to paramCount do
if copy(paramStr(i), 1, 1)='/' then
if upper(paramStr(i))='/V' then
verbose := true
else if upper(paramStr(i))='/D' then
debug := true
else
begin
writeln(invalid_option, paramStr(i));
halt(1);
end
else if yfilename='' then
yfilename := addExt(upper(paramStr(i)), 'Y')
else if pasfilename='' then
pasfilename := addExt(upper(paramStr(i)), 'PAS')
else
begin
writeln(illegal_no_args);
halt(1);
end;
if yfilename='' then
begin
writeln(illegal_no_args);
halt(1);
end;
if pasfilename='' then pasfilename := root(yfilename)+'.PAS';
lstfilename := root(yfilename)+'.LST';
(* open files: *)
assign(yyin, yfilename);
assign(yyout, pasfilename);
assign(yylst, lstfilename);
reset(yyin); if ioresult<>0 then fatal(cannot_open_file+yfilename);
rewrite(yyout); if ioresult<>0 then fatal(cannot_open_file+pasfilename);
rewrite(yylst); if ioresult<>0 then fatal(cannot_open_file+lstfilename);
(* search code template in current directory, then on path where Yacc
was executed from: *)
codfilename := 'YYPARSE.COD';
assign(yycod, codfilename);
reset(yycod);
if ioresult<>0 then
begin
codfilename := upper(path(paramStr(0)))+'YYPARSE.COD';
assign(yycod, codfilename);
reset(yycod);
if ioresult<>0 then fatal(cannot_open_file+codfilename);
end;
(* parse source grammar: *)
write('parse ... ');
lno := 0; cno := 1; line := '';
next_section;
if debug then writeln(yyout, '{$define yydebug}');
if yyparse=0 then
{ done }
else if yychar=0 then
error(unexpected_eof)
else
error(syntax_error);
if errors=0 then writeln('DONE');
(* close files: *)
close(yyin); close(yyout); close(yylst); close(yycod);
(* print statistics: *)
if errors>0 then
writeln( lno, ' lines, ',
errors, ' errors found.' )
else
begin
writeln( lno, ' lines, ',
n_rules-1, '/', max_rules-1, ' rules, ',
n_states, '/', max_states, ' s, ',
n_items, '/', max_items, ' i, ',
n_trans, '/', max_trans, ' t, ',
n_redns, '/', max_redns, ' r.');
if shift_reduce>0 then
writeln(shift_reduce, ' shift/reduce conflicts.');
if reduce_reduce>0 then
writeln(reduce_reduce, ' reduce/reduce conflicts.');
if never_reduced>0 then
writeln(never_reduced, ' rules never reduced.');
end;
if warnings>0 then writeln(warnings, ' warnings.');
writeln( n_bytes, '/', max_bytes, ' bytes of memory used.');
(* terminate: *)
if errors>0 then
begin
erase(yyout);
if ioresult<>0 then ;
end;
if file_size(lstfilename)=0 then
erase(yylst)
else
writeln('(see ', lstfilename, ' for more information)');
halt(errors);
end(*Yacc*).